##-------------------------------------------------------------------------##
##
##  PROJECT TITLE:    When do voters respond to campaign finance disclosure? 
##                    Evidence from multiple election types
##
##  PROJECT AUTHOR:   THOMAS S. ROBINSON                        
##  EMAIL:            THOMAS.ROBINSON@DURHAM.AC.UK         
##
##  DESCRIPTION:      MAIN REPLICATION FILE     
##
##-------------------------------------------------------------------------##

#### Packages ####

# R version: 4.1.2 (2021-11-01)

library(nnet) # v7.3-16
library(cregg) #v0.4.0
library(tidyverse) #v1.3.1
library(dotwhisker) #v0.7.4
library(miceadds) #v3.11-6
library(broom) #v0.7.10
library(xtable) #v1.8-4
library(stargazer) #v5.2.2
library(sandwich) #v3.0-1
library(Hmisc) #v4.6-0

#### Init ####
source("conjoint_functions.R")

set.seed(89)

# Placeholder var. for issue with glm.clutser
wgt__ <- NULL

#### Data set up ####

## Candidate: Subjects exposed to partisan and ideological primes
cand_t_df <- read_csv("data/cand_treat_final.csv") %>%
  relevel_factors(.)

## Candidate: Subjects exposed only to CF information
cand_c_df <- read_csv("data/cand_control_final.csv") %>%
  relevel_factors(.) 

## Initiative issues
init_df <- read_csv("data/initiative_final.csv") %>%
  relevel_factors(.) 

issues <- unique(init_df$issue)

## Descriptive stats data
subj_desc <- init_df %>% 
  select(-average,-total,-largest,-prop,-origin,-camp,-vote,-rate,-issue) %>%
  filter(!duplicated(participantid))

#-------------------------------------------------------------------#
#### Experimental design statistics ####
#-------------------------------------------------------------------#

## No. in treatment group
length(unique(cand_t_df$participantid))

## Average score in the IET task
summary(init_df$SC0)

## Democratic bias in the sample
t.test(subj_desc[subj_desc$party_id == "A Democrat",]$ideology,
       subj_desc[subj_desc$party_id == "An independent",]$ideology)

sample_states <- unique(subj_desc$state)

sample_weightings <- subj_desc %>%
  group_by(state) %>% 
  summarise(weight = n()/nrow(.))

lean <- read_csv("data/gallup_lean.csv") %>% 
  filter(State %in% sample_states) %>% 
  left_join(sample_weightings, by = c("State" = "state")) %>% 
  mutate(wgt_lean = Diff*weight)

sum(lean$wgt_lean)

#### Subjects support of initiative policies ####

issue_approval_sig <- data.frame(Issue = rep(NA,4),
                                 p = rep(NA,4))

for (i in 1:4) {
  j <- issues[i]
  init_df_tmp <- init_df %>%
    filter(issue == j)
  
  issue_approval_sig[i,1] <- j
  issue_approval_sig[i,2] <- t.test(init_df_tmp[init_df_tmp$camp == "Support",]$rate,
                                    init_df_tmp[init_df_tmp$camp == "Oppose",]$rate)$p.value
  
}
rm(i,j, init_df_tmp)

#-------------------------------------------------------------------#
#### MAIN PAPER FIGURES ####
#-------------------------------------------------------------------#

#### Figure 1 ####

set.seed(89)

N <- 10000000
indiff <- rnorm(N, sd = 1)

x_shift <- 1
indiff_shift_x <- indiff + x_shift

p_shift <- 3.5
indiff_shift_p <- indiff - p_shift
indiff_shift_p_x <- indiff + x_shift - p_shift


panel_a <- data.frame(panel = "a",
                      x = density(indiff)$x,
                      y = density(indiff)$y,
                      type = "shift",
                      area = "blank")

panel_b <- data.frame(panel = "b",
                      x = c(density(indiff)$x,density(indiff_shift_x)$x),
                      y = c(density(indiff)$y,density(indiff_shift_x)$y),
                      type = rep(c("base","shift"), each = length(density(indiff)$x))) %>% 
  mutate(area = ifelse(x > 0 & x < x_shift & type == "shift","fill","blank"))

panel_c <- data.frame(panel = "c",
                      x = c(density(indiff_shift_p)$x,density(indiff_shift_p_x)$x),
                      y = c(density(indiff_shift_p)$y,density(indiff_shift_p_x)$y),
                      type = rep(c("base","shift"), each = length(density(indiff)$x))) %>% 
  mutate(area = ifelse(x > 0 & x < x_shift & type == "shift", "fill","blank"))

tails <- data.frame(panel = rep(c("a","b","c"),2),
                    x = rep(c(-8,8),3),
                    y = 0,
                    type = "shift",
                    area = "blank")

RUM_df <- rbind(panel_a,
                panel_b,
                panel_c,
                tails)


arrow_df <- data.frame(panel = c("b","c","c"),
                       arrow_start = c(0,-p_shift,0),
                       arrow_end = c(x_shift,x_shift-p_shift, -p_shift),
                       y_shift = c(0.05, 0.05, 0.2),
                       type = "shift")

effect_df <- data.frame(panel = c("b","c","c"),
                        text_x = c(x_shift/2,(x_shift-2*p_shift)/2,-p_shift/2),
                        text_y = c(0.1,0.1, 0.25),
                        text = c("D","D","P"),
                        type = "shift")


facet_labels <- c(a = "(i) Indifferent random utility model",
                  b = "(ii) Effect of disclosure (D) on difference in utility",
                  c = "(iii) Effect of disclosure (D) and partisanship (P) on difference in utility")

fig1 <- ggplot(RUM_df[RUM_df$panel == "b",], aes(x = x, y= y, linetype = type)) +
  geom_line() +
  geom_area(aes(fill = area, alpha = area)) +
  scale_fill_manual(values = c("white","red")) +
  scale_alpha_manual(values = c(0,0.5)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_segment(data = arrow_df[arrow_df$panel == "b",], 
               aes(x = arrow_start, xend = arrow_end,
                   y = max(density(indiff)$y)+y_shift, yend = max(density(indiff)$y)+y_shift),
               arrow = arrow(length = unit(0.03, "npc"))) +
  geom_text(data=effect_df[effect_df$panel == "b",], 
            aes(x = text_x, 
                y = max(density(indiff)$y) + text_y, 
                label = text),
            size = 3) +
  
  scale_linetype_manual(values = c("solid","dashed")) +
  scale_x_continuous(breaks = c(-6,0,6),
                     labels = c(expression(symbol('\254')*" Better off with Candidate A"),0,expression("Better off with Candidate B "*symbol('\256'))),
                     limits = c(-8,8)) +
  labs(x = expression("Difference in Utility Between Two Candidates ("*Delta*"U)"), y = expression("p("*Delta*"U)")) +
  theme_minimal() +
  theme(legend.position = "none",
        strip.background = element_rect(colour="white", fill="white"),
        strip.text = element_text(hjust = 0, face = "bold", size = 10),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

ggsave(plot = fig1, "figures/fig1.pdf", width = 5, height = 2, scale = 2)

#### Figure 2 ####

fig2 <- ggplot(RUM_df[RUM_df$panel == "c",], aes(x = x, y= y, linetype = type)) +
  geom_line() +
  geom_area(aes(fill = area, alpha = area)) +
  scale_fill_manual(values = c("white","red")) +
  scale_alpha_manual(values = c(0,0.5)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_segment(data = arrow_df[arrow_df$panel == "c",], 
               aes(x = arrow_start, xend = arrow_end,
                   y = max(density(indiff)$y)+y_shift, yend = max(density(indiff)$y)+y_shift),
               arrow = arrow(length = unit(0.03, "npc"))) +
  geom_text(data=effect_df[effect_df$panel == "c",], 
            aes(x = text_x, 
                y = max(density(indiff)$y) + text_y, 
                label = text),
            size = 3) +
  
  scale_linetype_manual(values = c("solid","dashed")) +
  scale_x_continuous(breaks = c(-6,0,6),
                     labels = c(expression(symbol('\254')*" Better off with Candidate A"),0,expression("Better off with Candidate B "*symbol('\256'))),
                     limits = c(-8,8)) +
  labs(x = expression("Difference in Utility Between Two Candidates ("*Delta*"U)"), y = expression("p("*Delta*"U)")) +
  theme_minimal() +
  theme(legend.position = "none",
        strip.background = element_rect(colour="white", fill="white"),
        strip.text = element_text(hjust = 0, face = "bold", size = 10),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))

ggsave(plot = fig2, "figures/fig2.pdf", width = 5, height = 2, scale = 2)

#### Figure 3 ####

# Screenshot of conjoint -- attached separately

#### Figure 4 ####
set.seed(89)
mod_control <- cluster_glm(formula = vote ~ average + total + largest + prop + origin, 
                           data = cand_c_df, type="ols", cluster = "participantid")

fig4 <- cj_plot(mod_control) + 
  aes(color = attribute) +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  # facet_wrap(. ~ attribute, ncol = 1, scales = "free_y") +
  # theme(strip.background = element_rect(colour="black", fill="lightgrey")) +
  labs(y="") +
  xlim(-0.3,0.3)+
  theme(plot.caption = element_text(hjust = 0.5),
        legend.position = "none")

ggsave(plot = fig4, "figures/fig4.pdf", device = 'pdf', width = 6, height = 5)

#### Figure 5 ####

# Code up party affiliation vis-a-vis subject
cand_t_id_recode <- cand_t_df %>%
  mutate(party_id = str_replace(party_id,"A ",""),
         party_id = str_replace(party_id,"An ",""),
         cand_ideology = ifelse(cand_ideology == "Very liberal",1.67,
                                ifelse(cand_ideology == "Moderate liberal",3.33,
                                       ifelse(cand_ideology == "Centrist",5,
                                              ifelse(cand_ideology == "Moderate conservative",6.67,
                                                     ifelse(cand_ideology == "Very conservative",8.33,NA))))),
         party = ifelse(party_id == party, "Same","Different"),
         cand_ideology2 = abs(ideology - cand_ideology)) %>% 
  mutate(party = factor(party, levels = c("Same","Different")))

mod_t_id_recode <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + 
                                 party + cand_ideology2 + office, 
                               data = cand_t_id_recode, type="ols", cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute %in% c("Ideological Distance","Party","Previous Office"), "Prime","Disclosure"))

fig5 <- cj_plot(mod_t_id_recode) +
  aes(color = attribute) +
  facet_grid(facet ~ ., scales = "free_y", space = "free") +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  xlim(-0.3, 0.3) +
  theme_minimal() +
  labs(y = "") +
  theme(legend.position = "none",
        strip.text = element_text(face="bold"))

ggsave(plot = fig5, "figures/fig5.pdf", device = 'pdf', width = 6, height = 5)

#### Figure 6 ####

mod_basic_vote <- cluster_glm(formula = "vote ~ average + total + largest + prop + origin + issue",
                              data = init_df, 
                              type = "ols", 
                              cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute == "Issue","Issue FE","Disclosure"))

fig6 <- cj_plot(mod_basic_vote) + 
  aes(color = attribute) +
  facet_grid(facet~., space = "free", scales = "free_y") +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  xlim(-0.3,0.3) +
  theme(legend.position = "none",
        strip.text = element_text(face="bold"))

ggsave(plot = fig6, "figures/fig6.pdf", device = 'pdf', width = 6, height = 5)

#### Figure 7 ####

init_vote_issues <- init_df %>%
  group_by(issue) %>%
  nest() %>%
  mutate(model = map(data,init_vote_cj)) %>%
  unnest(model) %>%
  ungroup(issue) %>%
  mutate(issue = case_when(issue == "marij" ~ "Marijuana Legalisation",
                           issue == "wage" ~ "Min. Wage Increase",
                           issue == "bond" ~ "Bond Issuance",
                           issue == "enviro" ~ "Carbon emissions tax"))

fig7 <- cj_plot(init_vote_issues, multi = "issue") +
  facet_grid(~issue) +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  theme(legend.position = "none",
        panel.spacing.x = unit(1,"lines"))

ggsave(plot = fig7, "figures/fig7.pdf", device = 'pdf', width = 9, height = 5)

#### Figure 8 ####

med_1_dem <- cluster_glm(formula = vote ~ average + total + largest + prop + origin, 
                         data = cand_c_df[cand_c_df$party_id == "A Democrat",], type="ols", cluster = "participantid") %>% 
  filter(!is.na(coef_name)) %>% 
  mutate(party_id = "Democrat")

med_1_rep <- cluster_glm(formula = vote ~ average + total + largest + prop + origin, 
                         data = cand_c_df[cand_c_df$party_id == "A Republican",], type="ols", cluster = "participantid") %>% 
  filter(!is.na(coef_name)) %>% 
  mutate(party_id = "Republican")

med_1_ind <- cluster_glm(formula = vote ~ average + total + largest + prop + origin, 
                         data = cand_c_df[cand_c_df$party_id == "An independent",], type="ols", cluster = "participantid") %>% 
  filter(!is.na(coef_name)) %>% 
  mutate(party_id = "Independent")

med_plot <- rbind(med_1_rep, med_1_dem, med_1_ind) %>% 
  mutate(party_id = ifelse(is.na(ci), "Reference category",party_id)) %>% 
  mutate(party_id = factor(party_id, levels = c("Democrat","Independent","Republican","Reference category")))

fig8 <- cj_plot(med_plot, multi = "party_id") +
  scale_color_manual(values = c("#0015bc","mediumorchid4","#ff0000","grey")) +
  geom_text(aes(label = sig), vjust = 0, position = position_dodge(0.5)) +
  theme_minimal() +
  labs(y = "", color = "Party Identification") +
  theme(legend.position = "bottom",
        strip.text = element_text(face="bold")) +
  guides(color=guide_legend(nrow = 2))

ggsave(plot = fig8, "figures/fig8.pdf",width = 7, height =  6.2)

#-------------------------------------------------------------------#
##### APPENDIX FIGURES AND TABLES ####
#-------------------------------------------------------------------#

#### Figure A1 - subjects by state ####

subj_states <- subj_desc %>% 
  group_by_("state","party_id") %>% 
  summarise(`Freq (%)` = round(100*n()/nrow(.),1)) %>% 
  ungroup() %>% 
  rename(State = state,
         `Party ID` = party_id) %>% 
  mutate(State = ifelse(is.na(State),"(Missing)",State))

state_order <- unique(subj_states[order(subj_states$State, decreasing = TRUE),"State"]) %>% 
  filter(State!=("(Missing)")) %>% 
  pull(1)

state_order <- c("(Missing)",state_order)

subj_states$State <- factor(subj_states$State, 
                            levels = state_order)

subj_states$`Party ID` <- ifelse(subj_states$`Party ID` == "A Democrat","Dem.",
                                 ifelse(subj_states$`Party ID` == "A Republican","Rep.",
                                        ifelse(subj_states$`Party ID` == "An independent","Ind.",
                                               ifelse(!is.na(subj_states$`Party ID`),"Other",NA))))

subj_states$`Party ID` <- factor(subj_states$`Party ID`,
                                 levels = c("Dem.","Rep.", "Ind.", "Other"))

fig_a1 <- ggplot(filter(subj_states, !is.na(`Party ID`)),
                aes(x = State, y = `Freq (%)`, fill=`Party ID`)) +
  geom_col() +
  scale_fill_manual(values = c("blue","red","purple","grey")) +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "bottom")

ggsave(plot = fig_a1, "figures/subjects_states.pdf", device="pdf", dpi = 300)

#### Table A1 - subject demographic summary ####

numVars <- c("ideology","age")

listVars <- c("gender","ethnic",
              "party_id")

subj_summs <- list()
demog_lengths <- c()
traits <- list()

for (l in 1:length(listVars)) {
  subj_summs[[l]] <- subj_desc %>% 
    group_by_(listVars[l]) %>% 
    summarise(`Freq (\\%)` = round(100*n()/nrow(.),1)) %>% 
    rename(Value = 1)
  
  
  subj_summs[[l]] <- rbind(subj_summs[[l]][!is.na(subj_summs[[l]]$Value),],
                           subj_summs[[l]][is.na(subj_summs[[l]]$Value),])
  
  
  demog_lengths[l] <- nrow(subj_summs[[l]])
  
  traits[[l]] <- c(str_to_title(listVars[l]),
                   rep("",demog_lengths[l]-1))
}

catVars <- do.call(rbind, subj_summs) %>% 
  mutate(Variable = do.call(c,traits)) %>% 
  select(Variable, Value, `Freq (\\%)`) %>% 
  mutate(Value = ifelse(is.na(Value),"(Missing)", Value))

subj_tab <- tibble(Variable = c("Age",""),
                   Value = c("\\textit{Mean}","\\textit{Standard Deviation}"),
                   `Freq (\\%)` = c(mean(subj_desc$age, na.rm = TRUE),
                                    sd(subj_desc$age, na.rm = TRUE))) %>%
  rbind(catVars) %>% 
  rbind(tibble(Variable = c("Ideology",""),
               Value = c("\\textit{Mean}","\\textit{Standard Deviation}"),
               `Freq (\\%)` = c(mean(subj_desc$ideology, na.rm = TRUE),
                                sd(subj_desc$ideology, na.rm = TRUE))))

subj_tab$Variable[subj_tab$Variable == "Party_id"] <- "Party ID"

print(xtable(subj_tab,
             label = "tab:subj_summ",
             caption = "Descriptive summary of key demographics for conjoint experiment subjects."),
      sanitize.text.function = function(x) {x},
      include.rownames = FALSE,
      hline.after = c(-1,
                      which(subj_tab$Variable != "")-1,
                      nrow(subj_tab)),
      file = "tables/subject_summary.tex")

#### Table C1 ####

#  Manually created text table

#### Figures D1 and D2 ####

# Screenshots of conjoint vignettes

#### Figure E1 ####

# Candidate control
mod_control_stability <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + round, 
                                     data = cand_c_df, type="ols", cluster = "participantid") %>% 
  mutate(treat = "Control")

# Candidate treat
mod_t_stability <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + 
                                 party + cand_ideology2 + office + round, 
                               data = cand_t_id_recode, type="ols", cluster = "participantid") %>% 
  mutate(treat = "Treated")

stability_models <- rbind(mod_control_stability, mod_t_stability) %>% 
  mutate(Model = "Stability Check")

original_models <- rbind({mod_control %>% mutate(treat = "Control")}, 
                         {mod_t_id_recode %>% select(-facet) %>% mutate(treat = "Treated")}) %>% 
  mutate(Model = "Original")

stability_df <- rbind(stability_models, original_models) %>% filter(coef_name != "(Intercept)")

fig_e1 <- ggplot(stability_df, aes_string(x = "Estimate", y = "coef_name", color = "Model", shape = "Model")) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_point() +
  geom_errorbarh(height = 0.1,
                 aes(xmin = Estimate - ci, xmax = Estimate + ci)) + 
  labs(x = "AMCE", y = "Attribute-level") +
  scale_color_manual(values = c("black","red")) +
  facet_wrap(.~ treat) +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  theme_minimal() +
  labs(y = "") +
  theme(legend.position = "bottom",
        strip.text = element_text(face="bold"))

ggsave(plot = fig_e1, "figures/candidate_stability.pdf", device = 'pdf', width = 6, height = 5)

#### Table E1 ####

summary_stats_cand <- cand_c_df %>%
  mutate(office = NA,
         party = NA,
         cand_ideology = NA) %>%
  rbind(cand_t_df) %>%
  gather(average, largest, prop, origin, total, party, cand_ideology, office, key = "attribute", value = "level") %>%
  select(attribute, level, group) %>%
  group_by(group, attribute, level) %>%
  summarise(freq = n()) %>%
  ungroup()

total_attributes_cand <- summary_stats_cand %>%
  group_by(group, attribute) %>%
  summarise(total = sum(freq)) %>%
  ungroup()

summary_stats_table_cand <- summary_stats_cand %>% 
  left_join(total_attributes_cand, by = c("group", "attribute")) %>%
  mutate(proportion = freq/total) %>%
  select(-freq, -total) %>%
  spread(group, proportion) %>% 
  mutate(attribute = str_to_title(attribute),
         attribute = gsub("Cand_ideology","Cand. Ideology", attribute),
         level = ifelse(is.na(level),"--",level)) %>% 
  rename(Attribute = attribute,
         Level = level,
         Control = control,
         Treat  = treat)

print(xtable(summary_stats_table_cand,
             caption = "Balance test: proportion of times each attribute-level was displayed to participants in the candidate conjoints",
             label = "tab:cand_bt_level_prop",
             digits = 2),
      include.rownames = FALSE,
      file = "tables/cand_bt_level_proportions.tex")

#### Table E2 ####

summary_stats_init <- init_df %>%
  gather(average, largest, prop, origin, total, key = "attribute", value = "level") %>%
  select(attribute, level, issue) %>%
  group_by(issue,attribute, level) %>%
  summarise(freq = n()) %>%
  ungroup()

total_attributes_init <- summary_stats_init %>%
  group_by(issue, attribute) %>%
  summarise(total = sum(freq)) %>%
  ungroup()

summary_stats_table_init <- summary_stats_init %>% 
  left_join(total_attributes_init, by = c("issue", "attribute")) %>%
  mutate(proportion = freq/total) %>%
  select(-freq, -total) %>%
  spread(issue, proportion) %>% 
  mutate(attribute = str_to_title(attribute)) %>% 
  rename(Attribute = attribute,
         Level = level,
         Bond = bond,
         `Enviro.` = enviro,
         `Marij.` = marij,
         `Wage` = wage)

print(xtable(summary_stats_table_init,
             caption = "Balance test: proportion of times each attribute-level was displayed to participants in the initiative conjoint",
             label = "tab:init_bt_level_prop",
             digits = 2),
      include.rownames = FALSE,
      file = "tables/init_bt_level_proportions.tex")

#### Figure F1 ####

cand_c_df_mm <- cand_c_df %>%
  filter(party_id  %in% c("A Republican", "A Democrat", "An independent")) %>% 
  mutate(total = as.factor(total),
         origin = as.factor(origin),
         party_id = as.factor(party_id))

cand_c_mm <- cj(data = cand_c_df_mm,
                formula = vote ~ average + total + largest + prop + origin,
                by = ~ party_id,
                estimate = "mm_diff") %>% 
  mutate(sig = ifelse(p < 0.001,"***",
                      ifelse(p<0.01,"**",
                             ifelse(p<0.05,"*",""))),
         Comparison = ifelse(BY == 'A Republican - A Democrat','Rep. - Dem.',
                             ifelse(BY == 'An independent - A Democrat','Ind. - Dem.',NA)),
         level = factor(level, levels = ))

fig_f1 <- ggplot(cand_c_mm, aes(x = estimate, y = level, color = feature, shape = Comparison)) +
  geom_point() +
  geom_vline(xintercept = 0) +
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0) +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  labs(x = "Estimated Difference", y = "", shape = "Comparison") +
  aes(color = feature, show_guide = FALSE) +
  theme_minimal() +
  guides(color=FALSE) +
  theme(legend.position = "bottom")

ggsave(plot = fig_f1, "figures/candidate_control_mm.pdf", device = 'pdf', width = 6, height = 5)

#### Table F1 -- z score tests of beta differences ####

# Calculate coef. difference
cand_beta_diff_dr <- med_1_dem %>% 
  left_join(med_1_rep, by = "coef_name") %>% 
  mutate(diff = Estimate.x - Estimate.y) %>% 
  mutate(z = abs(diff/sqrt((`Std. Error.x`)^2 + (`Std. Error.y`)^2))) %>% 
  mutate(p = 2*pnorm(-abs(z))) %>% 
  
  select(coef_name, Estimate.x, `Std. Error.x`,Estimate.y, `Std. Error.y`,diff, z, p) %>% 
  filter(!is.na(z))

cand_beta_diff_di <- med_1_dem %>% 
  left_join(med_1_ind, by = "coef_name") %>% 
  mutate(diff = Estimate.x - Estimate.y) %>% 
  mutate(z = abs(diff/sqrt((`Std. Error.x`)^2 + (`Std. Error.y`)^2))) %>% 
  mutate(p = 2*pnorm(-abs(z))) %>% 
  
  select(coef_name, Estimate.x, `Std. Error.x`,Estimate.y, `Std. Error.y`,diff, z, p) %>% 
  filter(!is.na(z))

cand_beta_diff_ri <- med_1_rep %>% 
  left_join(med_1_ind, by = "coef_name") %>% 
  mutate(diff = Estimate.x - Estimate.y) %>% 
  mutate(z = abs(diff/sqrt((`Std. Error.x`)^2 + (`Std. Error.y`)^2))) %>% 
  mutate(p = 2*pnorm(-abs(z))) %>% 
  
  select(coef_name, Estimate.x, `Std. Error.x`,Estimate.y, `Std. Error.y`,diff, z, p) %>% 
  filter(!is.na(z))

cand_beta_diff_dr %>% select(coef_name, Estimate.x, Estimate.y, diff,z,p) %>%
  rename(Democrat = Estimate.x, Republican = Estimate.y, dr = diff, dr_z = z) %>%
  
  left_join({cand_beta_diff_di %>% select(coef_name, Estimate.y, diff, z) %>% rename(Independent = Estimate.y, di = diff, di_z = z)},
            by = "coef_name") %>%
  
  left_join({cand_beta_diff_ri %>% select(coef_name, diff, z) %>% rename(ri = diff, ri_z = z)},
            by = "coef_name") %>%
  
  select(coef_name, Democrat, Republican, Independent, dr, dr_z, di, di_z, ri, ri_z) %>%
  
  xtable(., digits = 3) %>%
  
  print(include.rownames = FALSE,
        include.colnames = FALSE,
        hline.after = NULL,
        only.contents = TRUE,
        file = "tables/cand_pid_beta_comp.tex")

#### Table F2 ####

med_init_dem <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + issue, 
                            data = init_df[init_df$party_id == "A Democrat",], type="ols", cluster = "participantid") %>% 
  filter(!is.na(coef_name)) %>% 
  mutate(party_id = "Democrat")

med_init_ind <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + issue, 
                            data = init_df[init_df$party_id == "An independent",], type="ols", cluster = "participantid") %>% 
  filter(!is.na(coef_name)) %>% 
  mutate(party_id = "Independent")

med_init_rep <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + issue, 
                            data = init_df[init_df$party_id == "A Republican",], type="ols", cluster = "participantid") %>% 
  filter(!is.na(coef_name)) %>% 
  mutate(party_id = "Republican")

med_plot_init <- rbind(med_init_dem, med_init_rep, med_init_ind) %>% 
  mutate(party_id = ifelse(is.na(ci), "Reference category",party_id)) %>% 
  mutate(party_id = factor(party_id, levels = c("Democrat","Independent","Republican","Reference category")))

init_beta_diff_dr <- med_init_dem %>% 
  left_join(med_init_rep, by = "coef_name") %>% 
  mutate(diff = Estimate.x - Estimate.y) %>% 
  mutate(z = abs(diff/sqrt((`Std. Error.x`)^2 + (`Std. Error.y`)^2))) %>% 
  mutate(p = 2*pnorm(-abs(z))) %>% 
  
  select(coef_name, Estimate.x, `Std. Error.x`,Estimate.y, `Std. Error.y`,diff, z, p) %>% 
  filter(!is.na(z))

init_beta_diff_di <- med_init_dem %>% 
  left_join(med_init_ind, by = "coef_name") %>% 
  mutate(diff = Estimate.x - Estimate.y) %>% 
  mutate(z = abs(diff/sqrt((`Std. Error.x`)^2 + (`Std. Error.y`)^2))) %>% 
  mutate(p = 2*pnorm(-abs(z))) %>% 
  
  select(coef_name, Estimate.x, `Std. Error.x`,Estimate.y, `Std. Error.y`,diff, z, p) %>% 
  filter(!is.na(z))

init_beta_diff_ri <- med_init_rep %>% 
  left_join(med_init_ind, by = "coef_name") %>% 
  mutate(diff = Estimate.x - Estimate.y) %>% 
  mutate(z = abs(diff/sqrt((`Std. Error.x`)^2 + (`Std. Error.y`)^2))) %>% 
  mutate(p = 2*pnorm(-abs(z))) %>% 
  
  select(coef_name, Estimate.x, `Std. Error.x`,Estimate.y, `Std. Error.y`,diff, z, p) %>% 
  filter(!is.na(z))

init_beta_diff_dr %>% select(coef_name, Estimate.x, Estimate.y, diff,z,p) %>%
  rename(Democrat = Estimate.x, Republican = Estimate.y, dr = diff, dr_z = z) %>%
  
  left_join({init_beta_diff_di %>% select(coef_name, Estimate.y, diff, z) %>% rename(Independent = Estimate.y, di = diff, di_z = z)},
            by = "coef_name") %>%
  
  left_join({init_beta_diff_ri %>% select(coef_name, diff, z) %>% rename(ri = diff, ri_z = z)},
            by = "coef_name") %>%
  
  select(coef_name, Democrat, Republican, Independent, dr, dr_z, di, di_z, ri, ri_z) %>%
  
  xtable(., digits = 3) %>%
  
  print(include.rownames = FALSE,
        include.colnames = FALSE,
        hline.after = NULL,
        only.contents = TRUE,
        file = "tables/init_pid_beta_comp.tex")

#### Figure F2 -- control model by ideology subsets ####

mod_c_id_left <- cluster_glm(formula = vote ~ average + total + largest + prop + origin, 
                             data = cand_c_df[cand_c_df$ideology <= 5,], type="ols", cluster = "participantid") %>% 
  mutate(ids = "Left (<= 5)")

mod_c_id_right <- cluster_glm(formula = vote ~ average + total + largest + prop + origin, 
                              data = cand_c_df[cand_c_df$ideology > 5,], type="ols", cluster = "participantid") %>% 
  mutate(ids = "Right (> 5)")

mod_c_comb_ids <- rbind(mod_c_id_left, mod_c_id_right) %>% 
  mutate(ids = ifelse(is.na(ci), "Reference category",ids)) %>% 
  mutate(ids = factor(ids, levels = c("Left (<= 5)","Right (> 5)","Reference category")))

fig_f2 <- cj_plot(mod_c_comb_ids, multi = "ids") +
  aes(color = ids) +
  scale_color_manual(values = c("deepskyblue2","coral2","grey")) +
  geom_text(aes(label = sig), vjust = 0, position = position_dodge(0.7), show.legend = FALSE) +
  theme_minimal() +
  labs(y = "", color = "Party Identification") +
  guides(color=guide_legend(ncol=2)) +
  theme(legend.position = "bottom",
        strip.text = element_text(face="bold"))

ggsave(plot = fig_f2, "figures/candidate_control_ideology_mods.pdf", device = 'pdf', width = 7, height = 7.5)

#### Figure F3 - separate models by party ID ####
mod_t_id_dem <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + 
                              party + cand_ideology + office, 
                            data = cand_t_df[cand_t_df$party_id == "A Democrat",], type="ols", cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute %in% c("Candidate Ideology","Party","Previous Office"), "Prime","Disclosure"),
         r_pid = "Democrat")

mod_t_id_ind <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + 
                              party + cand_ideology + office, 
                            data = cand_t_df[cand_t_df$party_id == "An independent",], type="ols", cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute %in% c("Candidate Ideology","Party","Previous Office"), "Prime","Disclosure"),
         r_pid = "Independent")

mod_t_id_rep <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + 
                              party + cand_ideology + office, 
                            data = cand_t_df[cand_t_df$party_id == "A Republican",], type="ols", cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute %in% c("Candidate Ideology","Party","Previous Office"), "Prime","Disclosure"),
         r_pid = "Republican")

mod_t_comb_pids <- rbind(mod_t_id_dem,mod_t_id_rep, mod_t_id_ind) %>% 
  mutate(r_pid = ifelse(is.na(ci), "Reference category",r_pid)) %>% 
  mutate(r_pid = factor(r_pid, levels = c("Democrat","Independent","Republican","Reference category")))

fig_f3 <- cj_plot(mod_t_comb_pids, multi = "r_pid") +
  aes(color = r_pid) +
  scale_color_manual(values = c("#0015bc","mediumorchid4","#ff0000","grey")) +
  facet_grid(facet ~ ., scales = "free_y", space = "free") +
  geom_text(aes(label = sig), vjust = 0, position = position_dodge(0.5), show.legend = FALSE) +
  theme_minimal() +
  labs(y = "", color = "Party Identification") +
  guides(color=guide_legend(ncol=2)) +
  theme(legend.position = "bottom",
        strip.text = element_text(face="bold"))

ggsave(plot = fig_f3, "figures/candidate_treat_party_mods.pdf", device = 'pdf', width = 7, height = 7.5)

#### Figure F4 -- treatment model by ideology subsets ####

mod_t_id_left <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + 
                               party + cand_ideology + office, 
                             data = cand_t_df[cand_t_df$ideology <= 5,], type="ols", cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute %in% c("Candidate Ideology","Party","Previous Office"), "Prime","Disclosure"),
         ids = "Left (<= 5)")

mod_t_id_right <- cluster_glm(formula = vote ~ average + total + largest + prop + origin + 
                                party + cand_ideology + office, 
                              data = cand_t_df[cand_t_df$ideology > 5,], type="ols", cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute %in% c("Candidate Ideology","Party","Previous Office"), "Prime","Disclosure"),
         ids = "Right (> 5)")

mod_t_comb_ids <- rbind(mod_t_id_left, mod_t_id_right) %>% 
  mutate(ids = ifelse(is.na(ci), "Reference category",ids)) %>% 
  mutate(ids = factor(ids, levels = c("Left (<= 5)","Right (> 5)","Reference category")))

fig_f4 <- cj_plot(mod_t_comb_ids, multi = "ids") +
  aes(color = ids) +
  scale_color_manual(values = c("deepskyblue2","coral2","grey")) +
  facet_grid(facet ~ ., scales = "free_y", space = "free") +
  geom_text(aes(label = sig), vjust = 0, position = position_dodge(0.5), show.legend = FALSE) +
  theme_minimal() +
  labs(y = "", color = "Party Identification") +
  guides(color=guide_legend(ncol=2)) +
  theme(legend.position = "bottom",
        strip.text = element_text(face="bold"))

ggsave(plot = fig_f4, "figures/candidate_treat_ideology_mods.pdf", device = 'pdf', width = 7, height = 7.5)

#### Figure F5 - diff. in marginal means for party id (treat) ####
cand_t_df_mm <- cand_t_df %>%
  filter(party_id  %in% c("A Republican", "A Democrat", "An independent")) %>% 
  mutate(total = as.factor(total),
         origin = as.factor(origin),
         party  = as.factor(party),
         cand_ideology = as.factor(cand_ideology),
         office = as.factor(office),
         party_id = as.factor(party_id))

cand_t_mm <- cj(data = cand_t_df_mm,
                formula = vote ~ average + total + largest + prop + origin + party + cand_ideology + office,
                by = ~ party_id,
                estimate = "mm_diff") %>% 
  mutate(sig = ifelse(p < 0.001,"***",
                      ifelse(p<0.01,"**",
                             ifelse(p<0.05,"*","")))) %>% 
  rename(Comparison = BY)

fig_f5 <- ggplot(cand_t_mm, aes(x = estimate, y = level, color = feature, shape = Comparison)) +
  geom_point() +
  guides(color=FALSE) +
  geom_vline(xintercept = 0) +
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0) +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  labs(x = "Estimated Difference", y = "") +
  guides(shape=guide_legend(ncol=1)) + 
  theme_minimal() +
  theme(legend.position = "bottom")

ggsave(plot = fig_f5, "figures/candidate_treat_mm.pdf", device = 'pdf', width = 6, height = 5)

#### Figure F6 - initiative models with partisan identity control ####
mod_basic_vote_pid <- cluster_glm(formula = "vote ~ average + total + largest + prop + origin + issue + party_id",
                                  data = init_df, 
                                  type = "ols", 
                                  cluster = "participantid") %>% 
  mutate(facet = ifelse(attribute == "Issue","Issue FE","Disclosure"))

fig_f6 <- cj_plot(mod_basic_vote_pid) + 
  aes(color = attribute) +
  facet_grid(facet~., space = "free", scales = "free_y") +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  xlim(-0.3,0.3) +
  theme(legend.position = "none",
        strip.text = element_text(face="bold"))

ggsave(plot = fig_f6, "figures/init_vote_all_pid_control.pdf", device = 'pdf', width = 6, height = 5)

#### Figure F7 - marginal means between issues ####
init_df_mm <- init_df %>%
  mutate(total = as.factor(total),
         origin = as.factor(origin),
         party_id = as.factor(party_id),
         
         issue = ifelse(issue == "enviro","Enviro.",issue),
         issue = ifelse(issue == "bond","Bond",issue),
         issue = ifelse(issue == "marij","Marij.",issue),
         issue = ifelse(issue == "wage","Wage",issue),
         
         issue = as.factor(issue))

init_mm_issues <- cj(data = init_df_mm,
                     formula = vote ~ average + total + largest + prop + origin,
                     by = ~ issue,
                     estimate = "mm_diff") %>% 
  mutate(sig = ifelse(p < 0.001, "***",
                      ifelse(p < 0.01, "**",
                             ifelse(p < 0.05,"*",""))))

fig_f7 <- ggplot(init_mm_issues, aes(x = estimate, y = level, color = feature, shape = BY)) +
  geom_point() +
  geom_vline(xintercept = 0) +
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0) +
  geom_text(aes(label = sig), vjust = 0, nudge_y = 0.05) +
  labs(x = "Estimated Difference", y = "") +
  theme_minimal() +
  guides(color=FALSE)+
  labs(shape = "Issues") +
  theme(legend.position = "bottom")

ggsave(plot = fig_f7, "figures/init_issues_mm.pdf", device = 'pdf', width = 7, height = 5)

#### Figure F8 -- models per partisan identity for initiative conjoint ####

fig_f8 <- cj_plot(med_plot_init, multi = "party_id") +
  scale_color_manual(values = c("#0015bc","mediumorchid4","#ff0000","grey")) +
  geom_text(aes(label = sig), vjust = 0, position = position_dodge(0.5)) +
  theme_minimal() +
  labs(y = "", color = "Party Identification") +
  guides(color=guide_legend(ncol=2)) +
  theme(legend.position = "bottom",
        strip.text = element_text(face="bold"))

ggsave(plot = fig_f8, "figures/init_pid_diff.pdf",width = 7, height = 7)

#### Table G1 -- initiative support ####

vote_desc <- init_df %>%
  filter(camp == "Support") %>%
  group_by(issue) %>%
  summarise(Vote = mean(vote))

rate_desc <- init_df %>%
  group_by(camp, issue) %>%
  summarise(approval = mean(rate)) %>%
  spread(key = camp, value = approval)

desc_tabl <- vote_desc %>%
  cbind(rate_desc[,c("Oppose","Support")]) %>%
  mutate(issue = ifelse(issue == "marij","Marijuana legalisation",
                        ifelse(issue == "enviro","Environment tax",
                               ifelse(issue == "bond","Bond issuance",
                                      ifelse(issue == "wage","Wage increase",NA))))) %>%
  rename(Issue = issue) %>%
  xtable(., caption = 'Subject support and rating of initiative campaigns',
         label = "tab:init_support",
         digits = 2)

print(desc_tabl, file = "tables/init_suppport.tex",
      include.rownames = FALSE)

#### Table H1 -- z-score differences ####

compare_mods <- mod_t_id_recode %>% 
  select(coef_name, Estimate, `Std. Error`) %>% 
  rename(beta_t = Estimate, sigma_t = `Std. Error`) %>% 
  left_join(
    {
      mod_control %>% 
        select(coef_name, Estimate, `Std. Error`, sig) %>% 
        rename(beta_c = Estimate, sigma_c = `Std. Error`) %>% 
        mutate(level = rownames(mod_control))
    },
    by = "coef_name") %>% 
  
  mutate(Z_score = (beta_c - beta_t)/sqrt(sigma_t^2 + sigma_c^2)) %>% 
  mutate(p = 2*pnorm(-abs(Z_score))) %>% 
  filter(!is.na(Z_score),
         !is.na(coef_name)) %>% 
  mutate(prev_sig = (sig != ""),
         coef_name = gsub("%", "\\\\%", coef_name)) %>% 
  mutate(coef_name = ifelse(prev_sig, bold_text_wrapper(coef_name), coef_name)) %>% 
  mutate(coef_name = gsub("\\$","\\\\$", coef_name)) %>% 
  select(coef_name, beta_c, beta_t, Z_score, p)

xtable(compare_mods) %>% 
  print(only.contents = TRUE,
        hline.after = NULL,
        digits = 2,
        include.colnames = FALSE,
        include.rownames = FALSE,
        sanitize.text.function = function (x) {x},
        file = "tables/model_compare_z_score.tex")

#### Table H2 -- post hoc power test ####

set.seed(89)

control_mod <- glm.cluster(formula = vote ~ average + total + largest + prop + origin,
                           data = cand_c_df,
                           cluster = "participantid",
                           weights = NULL,
                           family = gaussian)

summary(control_mod)

n_obs <- (390/2)*2*5

att_levels <- c(3,3,5,3,2)


sim_loop <- function(conf_level) {
  
  assign_matrices <- lapply(att_levels, function (x) {
    
    assigned_levels <- sample(x, size = n_obs, replace = TRUE)
    att_matrix <- matrix(nrow = n_obs, ncol = x)
    
    for (i in 1:x) {
      
      att_matrix[,i] <- ifelse(assigned_levels == i, 1,0)
      
    }
    
    return(att_matrix[,-x])
    
    
  })
  
  treat_matrix <- do.call("cbind", assign_matrices) %>% 
    cbind(1, .)
  
  Y <- rnorm(n_obs, mean = treat_matrix %*% control_mod$glm_res$coefficients, sd = sd(control_mod$glm_res$residuals))
  
  sim_data <- cbind(Y = ifelse(Y < 0.5,0,1), treat_matrix[,2:12]) %>% 
    as.data.frame(.)
  
  sim_mod <- glm(Y~., data = sim_data)
  return(summary(sim_mod)$coefficients[-1,4] < conf_level)
}

sim_results <- replicate(1000, sim_loop(0.1), simplify = FALSE)

sim_power <- colMeans(do.call("rbind", sim_results))
names(sim_power) <- mod_control$coef_name[2:12]

post_hoc_table <- data.frame(coef = gsub("%", "\\\\%", names(sim_power)),
                             prev_sig = mod_control$`Pr(>|t|)`[2:12] < 0.05) %>% 
  
  mutate(prev_sig_str = ifelse(prev_sig,"\\textbf{Yes}","No"),
         power = sim_power
  ) %>%
  mutate(coef = ifelse(prev_sig, bold_text_wrapper(coef), coef)) %>% 
  select(-prev_sig) %>% 
  mutate(coef = gsub("\\$","\\\\$", coef))

xtable(post_hoc_table, digits = 3) %>%
  print(include.rownames = FALSE,
        include.colnames = FALSE,
        only.contents = TRUE,
        file = "tables/post_hoc_table.tex",
        hline.after = NULL,
        sanitize.text.function = function (x) {x})


#-------------------------------------------------------------------#
#### Analysis reported in appendix text ####
#-------------------------------------------------------------------#

## Robustness test - profile-order check (section E) ####
mod_control_po <- cluster_glm(formula = vote ~ cand*(average + total + largest + prop + origin), 
                              data = cand_c_df, type="ols", cluster = "participantid") %>%
  mutate(sig = ifelse(`Pr(>|t|)` < 0.001,"***",
                      ifelse(`Pr(>|t|)` < 0.01, "**",
                             ifelse(`Pr(>|t|)` < 0.05,"*",""))))
